#!/usr/bin/env python
#-*- coding:utf-8 -*-
import urllib2
from bs4 import BeautifulSoup
import socket

baseurl = "http://dbmeizi.com/"
#伪装浏览器，以免被封
req_header = {'User-Agent':r'Mozilla/5.0 (windowsMozilla/5.0 (X11; Linux i686) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/34.0.1847.116 Chrome/34.0.1847.116 Safari/537.36'}
req_timeout = 20
def user_agent(url):
    try:
        req = urllib2.Request(url, None, req_header)
        page = urllib2.urlopen(req,None, req_timeout)
        html = page
    except urllib2.URLError, e:
        print e.message
    except socket.timeout, e:
        user_agent(url)
    return html

total = 0
def page_loop(pageid):
    url = ''.join([baseurl,'/?p=%s' % pageid])
    print url
    page = user_agent(url)
    soup = BeautifulSoup(page)
    total_img = 0
    img = soup.find_all(['img'])
    for myimg in img:
        link = myimg.get('src')
        big_img_link = myimg.get('data-bigimg')
        total_img += 2 
        print link
        print big_img_link
        content = user_agent(link).read()
        content_big = user_agent(big_img_link).read()
        with open('/home/web5/webimg/'+link[-11:],'wb') as code:
            code.write(content)
        with open('/home/web5/webimg/'+big_img_link[-11:],'wb') as code:
            code.write(content_big)
    return total_img
start_page = 407 
stop_page = 425
for pageid in range(start_page, stop_page):
    total += page_loop(pageid)
print total
